In [1]:
from ydata_profiling import ProfileReport
import pandas as pd
In [2]:
df = pd.read_csv("data/Austin_data.tsv", sep= '\t')
print(df)
C:\Users\kaush\AppData\Local\Temp\ipykernel_2900\1676182894.py:1: DtypeWarning: Columns (7,53) have mixed types. Specify dtype option on import or set low_memory=False.
df = pd.read_csv("data/Austin_data.tsv", sep= '\t')
crash_id crash_fatal_fl crash_date crash_time case_id \
0 13762420 N 03/30/2014 10:58:00 AM 10:58:00 140890874
1 13777334 N 03/27/2014 01:07:00 PM 13:07:00 140860852
2 13777441 N 03/28/2014 03:42:00 PM 15:42:00 140871196
3 13797332 N 04/09/2014 02:09:00 PM 14:09:00 140991015
4 13795604 N 04/07/2014 06:00:00 PM 18:00:00 140971248
... ... ... ... ... ...
147745 20060069 N 03/05/2024 03:23:00 AM 03:23:00 240650140
147746 20056192 N 03/01/2024 09:28:00 PM 21:28:00 240611420
147747 20083436 N 03/10/2024 12:16:00 AM 00:16:00 240700030
147748 20049322 N 02/28/2024 01:27:00 AM 01:27:00 240590075
147749 20047391 N 02/22/2024 07:57:00 PM 19:57:00 240531383
rpt_latitude rpt_longitude rpt_block_num rpt_street_pfx \
0 NaN NaN NaN NaN
1 NaN NaN 3400 NaN
2 NaN NaN 8704 NaN
3 NaN NaN 8000 NaN
4 NaN NaN 200 W
... ... ... ... ...
147745 NaN NaN 8800.0 NaN
147746 30.34404 -97.71144 7635.0 N
147747 NaN NaN 3500.0 NaN
147748 NaN NaN 3800.0 NaN
147749 30.15390 -97.79219 10100.0 S
rpt_street_name ... pedestrian_serious_injury_count \
0 3707 MANCHACA ... 0
1 PALM WAY TO MOPAC NB RAMP ... 0
2 BALCONES CLUB DR ... 0
3 E US 290 HWY SVRD EB ... 0
4 BEN WHITE ... 0
... ... ... ...
147745 N IH 35 NB ... 0
147746 GUADALUPE ST ... 0
147747 MONTOPOLIS DR ... 0
147748 SPICEWOOD SPRINGS RD EB ... 0
147749 S IH 35 SVRD SB ... 0
motorcycle_death_count motorcycle_serious_injury_count \
0 0 0
1 0 0
2 0 0
3 0 0
4 0 0
... ... ...
147745 0 0
147746 0 0
147747 0 0
147748 0 0
147749 0 0
other_death_count other_serious_injury_count onsys_fl private_dr_fl \
0 0 0 N N
1 0 0 N N
2 0 0 N N
3 0 0 Y N
4 0 0 Y N
... ... ... ... ...
147745 0 0 Y N
147746 0 0 N N
147747 0 0 N N
147748 0 0 N N
147749 0 0 Y N
micromobility_serious_injury_count micromobility_death_count \
0 0 0
1 0 0
2 0 0
3 0 0
4 0 0
... ... ...
147745 0 0
147746 0 0
147747 0 0
147748 0 0
147749 0 0
micromobility_fl
0 NaN
1 NaN
2 NaN
3 NaN
4 NaN
... ...
147745 NaN
147746 NaN
147747 NaN
147748 NaN
147749 NaN
[147750 rows x 54 columns]
In [3]:
profile = ProfileReport(df, title = "Austin Data Report")
In [4]:
profile.to_notebook_iframe()
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]